Dobre Bogdan-Mihai, Moldovan George, Mocanu Alexandru
07 decembrie 2020
Reprezentarea datelor sub forma de retea e o abordare diferita fata de modul clasic de reprezentare, cel tabelar. Acest format poate evidentia noi caracteristici ale datelor si imbunatesteste vizualizarea acestora intr-un mod semnificativ.
Pentru a realiza o analiza originala, setul de date folosit reprezinta structura unei retele de trafic cu tigari de contrabanda din Romania, datele fiind extrase dintr-un dosar penal. Astfel, utilizand metode de graph mining vom realiza o analiza asupra structurii retelei, dar si asupra rolurilor individuale ale membrilor.
Datele au fost modelate folosind un obiect de tip reţea din librăria statnet. Legăturile dintre noduri au fost introduse folosind o lista de muchii, iar nodurile au următoarele atribute : nume, nume abreviat si rol.
netmat <- rbind(c(1,2),
c(1,3),
c(2,3),
c(1,4),
c(5,6),
c(7,8),
c(5,7),
c(5,8),
c(5,9),
c(6,7),
c(6,8),
c(6,9),
c(5,10),
c(6,10),
c(11,12),
c(11,13),
c(13,14),
c(14,19),
c(13,19),
c(14,1),
c(19,15),
c(19,16),
c(19,17),
c(19,18),
c(12,15),
c(12,16),
c(12,17),
c(12,18),
c(20,8),
c(20,9),
c(21,8),
c(21,9),
c(3,8),
c(3,9),
c(1,8),
c(1,9))
net <- network(netmat, matrix.type="edgelist")netmatsym <- symmetrize(as.sociomatrix(net), rule ="weak")
netsym <- network(netmatsym, matrix.type="adjacency")
network.vertex.names(netsym) <- c("B***cu L***na",
"B***cu An***us",
"B**scu C***nel",
"B**hiu G***ge",
"M**tu M**na",
"Ma**u I***he",
"T**a F**p",
"T**a G***ghe",
"S**m An**la",
"G**ca G****ghe",
"C**u I**n",
"M***u L**do",
"D**a D**a",
"D**a C**l",
"N**cu P**u",
"N**se T**er",
"S***an C***tin",
"O***u A**ei",
"D**a I***l",
"P**ci V***e",
"D***mir R**a")
set.vertex.attribute(netsym, "role", c("C",
"C",
"C",
"CR",
"C",
"C",
"CT",
"CT",
"CT",
"C",
"C",
"A",
"A",
"C",
"C",
"C",
"C",
"C",
"CT",
"D",
"D"))
# C : Comerciant, CR : Cartita, CT: contrabandist, A: aducator clienti, D: depozitare
set.vertex.attribute(netsym, "abrev_name", c("BL",
"BA",
"BC",
"BG",
"MM",
"MI",
"TF",
"TG",
"SA",
"GG",
"CI",
"ML",
"DD",
"DC",
"NP",
"NT",
"SC",
"OA",
"DI",
"PV",
"DR"))
netsym %v% "alldeg" <- degree(netsym)
namelab <- get.vertex.attribute(netsym, "vertex.names")
rolelab <- get.vertex.attribute(netsym, "role")
abrevnamelab <-get.vertex.attribute(netsym, "abrev_name")
my_pal <- brewer.pal(5,"Dark2")
rolecat <- as.factor(get.vertex.attribute(netsym,"role"))plot(netsym,
main = "Infractional network",
usearrows=FALSE,
mode="fruchtermanreingold",
vertex.col = my_pal[rolecat],
label=rolelab,
displaylabels=T,
vertex.cex = 1.5)O prima analiza asupra retelei este realizarea rezumatului in 5 puncte. Functiile prezente in libraria statnet faciliteaza realizarea acesteia. Analizand aceste valori, putem avea o prima impresie despre structura retelei si despre modul de organizarea a acesteia.
## [1] "BASIC CHARACTERISTICS"
## Network attributes:
## vertices = 21
## directed = TRUE
## hyper = FALSE
## loops = FALSE
## multiple = FALSE
## bipartite = FALSE
## total edges = 72
## missing edges = 0
## non-missing edges = 72
## density = 0.1714286
##
## Vertex attributes:
##
## abrev_name:
## character valued attribute
## attribute summary:
## the 10 most common values are:
## BA BC BG BL CI DC DD DI DR GG
## 1 1 1 1 1 1 1 1 1 1
##
## alldeg:
## numeric valued attribute
## attribute summary:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.000 4.000 6.000 6.857 10.000 14.000
##
## role:
## character valued attribute
## attribute summary:
## A C CR CT D
## 2 12 1 4 2
## vertex.names:
## character valued attribute
## 21 valid vertex names
##
## No edge attributes
## [1] "Size:"
## [1] 21
## [1] "Density:"
## [1] 0.1714286
## [1] "Components:"
## [1] 1
## [1] "Diameter:"
## [1] 7
## [1] "Transitivity:"
## [1] 0.25
Folosind atributele definite în momentul creării, putem filtra reţeua astfel încat putem evidenţia importanţa unui anume rol. Spre exemplu, dacă am păstra în reţea doar Comercianţii, putem observa că aceştia sunt în mare partea izolaţi, distrugând aspectul de reţea compactă. Acest lucru evidenţiază rolul contrabandiştilor în reţea, aceştia asigurând practic conexitatea reţelei.
print("Filtering networks")
print(get.vertex.attribute(netsym, "role"))
comercianti <- get.inducedSubgraph(netsym, which (netsym %v% "role"=="C"))
gplot(comercianti,displaylabels=TRUE, main="Comercianti")delete.vertices(comercianti, isolates(comercianti))
gplot(comercianti, displaylabels = TRUE, main="Grupuri de comercianti")gplot(netsym,gmode="graph",edge.col="grey75",displaylabels=T,
vertex.cex=1.5,mode='circle',main="circle")gplot(netsym,gmode="graph",edge.col="grey75",displaylabels=T,
vertex.cex=1.5,mode='eigen',main="eigen")gplot(netsym,gmode="graph",edge.col="grey75",displaylabels=T,
vertex.cex=1.5,mode='random',main="random")gplot(netsym,gmode="graph",edge.col="grey75",displaylabels=T,
vertex.cex=1.5,mode='spring',main="spring")gplot(netsym,gmode="graph",edge.col="grey75",displaylabels=T,
vertex.cex=1.5,mode='fruchtermanreingold',main='fruchtermanreingold')gplot(netsym,gmode="graph",edge.col="grey75",displaylabels=T,
vertex.cex=1.5,mode='kamadakawai',
main='kamadakawai')library(network)
library(intergraph)
library(igraph)
library(networkD3)
plot(netsym,vertex.cex=0.5,main="Too small nodes")sidenum <- 3:7
rolecat <- as.factor(get.vertex.attribute(asIgraph(netsym),"role"))
plot(netsym,usearrows=FALSE,vertex.cex=4, main="Different node type",
displaylabels=F,vertex.sides=sidenum[rolecat])n_edge <- network.edgecount(netsym)
linecol_pal <- c("blue","red","green")
edge_cat <- sample(1:3,n_edge,replace=T)
plot(netsym,vertex.cex=1.5,vertex.col="grey25", main="Edge coloring example",
edge.col=linecol_pal[edge_cat],edge.lwd=2)n_edge <- network.edgecount(netsym)
edge_cat <- sample(1:3,n_edge,replace=T)
line_pal <- c(2,3,4)
gplot(netsym,vertex.cex=0.8,gmode="graph", main="Different edge type",
vertex.col="gray50",edge.lwd=1.5,
edge.lty=line_pal[edge_cat])my_pal <- brewer.pal(5,"Dark2")
rolecat <- as.factor(get.vertex.attribute(asIgraph(netsym),"role"))
plot(netsym,
main = "Infractional network",
usearrows=FALSE,
mode="fruchtermanreingold",
vertex.col = my_pal[rolecat],
label=abrevnamelab,
displaylabels=T,
vertex.cex = 1.5)
legend("bottomleft",legend=c("Aducator clienti","Comerciant","Cartita","Contrabandist","Depozitare"),
col=my_pal,pch=19,pt.cex=1.5,bty="n",
title="Criminal Role")#Visnetwork
library(visNetwork)
inetsym_edge <- get.edgelist(inetsym)
inetsym_edge <- data.frame(from = inetsym_edge[,1],
to = inetsym_edge[,2])
inetsym_nodes <- data.frame(id = as.numeric(V(inetsym)))
visNetwork(inetsym_nodes, inetsym_edge, width = "100%")##
## Attaching package: 'igraph'
## The following objects are masked from 'package:sna':
##
## betweenness, bonpow, closeness, components, degree, dyad.census,
## evcent, hierarchy, is.connected, neighborhood, triad.census
## The following objects are masked from 'package:network':
##
## %c%, %s%, add.edges, add.vertices, delete.edges, delete.vertices,
## get.edge.attribute, get.edges, get.vertex.attribute, is.bipartite,
## is.directed, list.edge.attributes, list.vertex.attributes,
## set.edge.attribute, set.vertex.attribute
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
## The following object is masked from 'package:base':
##
## union
library(intergraph)
### Transfer network from statnet format to igraph format
inetsym <- as.undirected(asIgraph(netsym))
V(inetsym)$name <- netsym %v% "abrev_name"
V(inetsym)$fullname <- network.vertex.names(netsym)
V(inetsym)$role <- rolecat
## Cliques
### Determine the cliques from the network as well as the biggest clique.
clique.number(inetsym)## [1] 4
## [[1]]
## + 3/21 vertices, named, from 9c55256:
## [1] MI TF TG
##
## [[2]]
## + 3/21 vertices, named, from 9c55256:
## [1] DD DC DI
##
## [[3]]
## + 3/21 vertices, named, from 9c55256:
## [1] BL BA BC
##
## [[4]]
## + 3/21 vertices, named, from 9c55256:
## [1] BL BC SA
##
## [[5]]
## + 3/21 vertices, named, from 9c55256:
## [1] BL BC TG
##
## [[6]]
## + 3/21 vertices, named, from 9c55256:
## [1] MM MI TF
##
## [[7]]
## + 4/21 vertices, named, from 9c55256:
## [1] MM MI TF TG
##
## [[8]]
## + 3/21 vertices, named, from 9c55256:
## [1] MM MI GG
##
## [[9]]
## + 3/21 vertices, named, from 9c55256:
## [1] MM MI SA
##
## [[10]]
## + 3/21 vertices, named, from 9c55256:
## [1] MM MI TG
##
## [[11]]
## + 3/21 vertices, named, from 9c55256:
## [1] MM TF TG
## [[1]]
## + 3/21 vertices, named, from 9c55256:
## [1] BA BL BC
##
## [[2]]
## + 3/21 vertices, named, from 9c55256:
## [1] DC DD DI
##
## [[3]]
## + 3/21 vertices, named, from 9c55256:
## [1] GG MM MI
##
## [[4]]
## + 4/21 vertices, named, from 9c55256:
## [1] TF MM TG MI
##
## [[5]]
## + 3/21 vertices, named, from 9c55256:
## [1] MM MI SA
##
## [[6]]
## + 3/21 vertices, named, from 9c55256:
## [1] BC BL TG
##
## [[7]]
## + 3/21 vertices, named, from 9c55256:
## [1] BC BL SA
## [[1]]
## + 4/21 vertices, named, from 9c55256:
## [1] TG MM MI TF
## coreness
## 1 2 3
## 1 13 7
## [1] 3
colors <- rainbow(maxCoreness)
plot(inetsym,vertex.label=coreness,vertex.color=colors[coreness],layout=layout_with_fr)i1_3 <- inetsym
i2_3 <- induced.subgraph(inetsym, vids=which(coreness > 1))
i3_3 <- induced.subgraph(inetsym, vids=which(coreness > 2))
lay <- layout.fruchterman.reingold(inetsym)
op <- par(mfrow=c(1,3),mar = c(3,0,2,0))
plot(i1_3,layout=lay,vertex.label=coreness,vertex.color=colors[coreness],main="All k-cores")
plot(i2_3,layout=lay[which(coreness > 1),],vertex.label=coreness[which(coreness > 1)],vertex.color=colors[coreness[which(coreness > 1)]],main="k-cores 2-3")
plot(i3_3,layout=lay[which(coreness > 2),],vertex.label=coreness[which(coreness > 2)],vertex.color=colors[coreness[which(coreness > 2)]],main="k-cores 3")par(op)
## Modularity is a measure that describes how good is a network clusterization
colors <- brewer.pal(5,"Dark2")
roles <- c("C","CR","CT","A","D")
V(inetsym)[V(inetsym)$role == "C"]$color <- colors[1]
V(inetsym)[V(inetsym)$role == "CR"]$color <- colors[2]
V(inetsym)[V(inetsym)$role == "CT"]$color <- colors[3]
V(inetsym)[V(inetsym)$role == "A"]$color <- colors[4]
V(inetsym)[V(inetsym)$role == "D"]$color <- colors[5]
V(inetsym)[V(inetsym)$role == "C"]$group <- 1
V(inetsym)[V(inetsym)$role == "CR"]$group <- 2
V(inetsym)[V(inetsym)$role == "CT"]$group <- 3
V(inetsym)[V(inetsym)$role == "A"]$group <- 4
V(inetsym)[V(inetsym)$role == "D"]$group <- 5
op <- par(mfrow=c(1,1))
plot(inetsym,vertex.color=V(inetsym)$color,vertex.size=10)## [1] 0
## The result is smaller than 0, which means a bad clusterization result using this method
## Community detection algorithms
cw <- cluster_walktrap(inetsym)
modularity(cw)## [1] 0.4903549
## BL BA BC BG MM MI TF TG SA GG CI ML DD DC NP NT SC OA DI PV DR
## 3 3 3 3 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 2 2
## [1] 0.4903549
## BL BA BC BG MM MI TF TG SA GG CI ML DD DC NP NT SC OA DI PV DR
## 1 1 1 1 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 2 2
## [1] 0.4903549
## BL BA BC BG MM MI TF TG SA GG CI ML DD DC NP NT SC OA DI PV DR
## 2 2 2 2 1 1 1 1 1 1 3 3 3 3 3 3 3 3 3 1 1
## [1] 0.4695216
## BL BA BC BG MM MI TF TG SA GG CI ML DD DC NP NT SC OA DI PV DR
## 1 1 1 1 3 3 3 3 1 3 2 2 2 2 2 2 2 2 2 1 1
## [1] 0.4185957
## BL BA BC BG MM MI TF TG SA GG CI ML DD DC NP NT SC OA DI PV DR
## 1 1 2 1 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 1 2
## [1] 0.464892
## BL BA BC BG MM MI TF TG SA GG CI ML DD DC NP NT SC OA DI PV DR
## 1 1 1 1 3 3 3 3 1 3 2 2 2 2 2 2 2 2 2 3 3
## [1] 0.4903549
## BL BA BC BG MM MI TF TG SA GG CI ML DD DC NP NT SC OA DI PV DR
## 3 3 3 3 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 1 1
##
## 1 2 3
## 1 2 0 0
## 2 6 3 3
## 3 0 0 1
## 4 1 3 0
## 5 0 2 0
## [1] 0.02816901
## [1] 1
## [1] 1
## [1] 0.7075812
op <- par(mfrow=c(3,2),mar=c(3,0,2,0))
plot(ceb, inetsym,vertex.label=V(inetsym)$name,main="Edge Betweenness")
plot(cfg, inetsym,vertex.label=V(inetsym)$name,main="Fastgreedy")
plot(clp, inetsym,vertex.label=V(inetsym)$name,main="Label Propagation")
plot(cle, inetsym,vertex.label=V(inetsym)$name,main="Leading Eigenvector")
plot(cs, inetsym,vertex.label=V(inetsym)$name,main="Spinglass")
plot(cw, inetsym,vertex.label=V(inetsym)$name,main="Walktrap")## Trying to generate a similar network using Erdos-Renyi method
no_nodes <- length(V(inetsym))
no_edges <- length(E(inetsym))
generated_network <- erdos.renyi.game(n=no_nodes,no_edges,type='gnm')
op <- par(mfrow=c(1,2))
plot(inetsym,vertex.label=NA,vertex.size=5)
plot(generated_network, vertex.label=NA, vertex.size=5)par(op)
## Trying to generate a similar network using Small-World Model
avg_degree <- no_edges/no_nodes*2
g1 <- watts.strogatz.game(dim=1, size=no_nodes, nei=avg_degree/2, p=.05)
g2 <- watts.strogatz.game(dim=1, size=no_nodes, nei=avg_degree/2, p=.15)
g3 <- watts.strogatz.game(dim=1, size=no_nodes, nei=avg_degree/2, p=.30)
op <- par(mfrow=c(2,2))
plot(inetsym,vertex.label=NA,vertex.size=5)
plot(g1, vertex.label=NA, vertex.size=5)
plot(g2, vertex.label=NA, vertex.size=5)
plot(g3, vertex.label=NA, vertex.size=5)par(op)
## Trying to generate a similar network using Scale-Free Model
barabasi_network <- barabasi.game(no_nodes, directed=FALSE)
op <- par(mfrow=c(1,2))
plot(inetsym,vertex.label=NA, vertex.size=5)
plot(barabasi_network,vertex.label=NA, vertex.size=5)par(op)
## Comparing random models with the empirical network
list_network <- c(generated_network, g2, barabasi_network, inetsym)
comparison_table <- data.frame(
Name = c("Erdos-Renyi", "Small world", "Scale-free model", "Empiric network"),
Size = c(length(V(generated_network)), length(V(g2)), length(V(barabasi_network)), length(V(inetsym))),
Density = c(gden(asNetwork(generated_network)),gden(asNetwork(g2)),gden(asNetwork(barabasi_network)),gden(asNetwork(inetsym))),
Avg_Degree = c(length(E(generated_network))/length(V(generated_network)),length(E(g2))/length(V(g2)),length(E(barabasi_network))/length(V(barabasi_network)),length(E(inetsym))/length(V(inetsym))),
Transitivity = c(transitivity(generated_network), transitivity(g2), transitivity(barabasi_network), transitivity(inetsym)),
Isolates = c(sum(degree(generated_network)==0),sum(degree(g2)==0),sum(degree(barabasi_network)==0),sum(degree(inetsym)==0))
)
comparison_table## Name Size Density Avg_Degree Transitivity Isolates
## 1 Erdos-Renyi 21 0.1714286 1.714286 0.2086957 0
## 2 Small world 21 0.1000000 1.000000 0.0000000 1
## 3 Scale-free model 21 0.0952381 0.952381 0.0000000 0
## 4 Empiric network 21 0.1714286 1.714286 0.2500000 0